#Get objects
old_ws = ls()

################
#Balance checks#
################

#functions
balance_tests = function(covariate, Tr, cluster) {
  require(MASS)
  require(Matching)
  require(multiwayvcov)
  require(lmtest)
  
  #Do negative binomial?
  do_nb = all(na.omit(covariate) == floor(na.omit(covariate)))
  
  #Do chi-square?
  x_table = table(Tr, covariate)
  do_chisq = (mean(x_table == 0) <= 0.1) | (length(x_table) < 10)
  
  #Treatment index
  t_idx = which(Tr == max(Tr))
  c_idx = which(Tr == min(Tr))
  
  #Create out table
  out = data.table(method = c('lm', 'ks', 'nb', 'chisq'), p = as.numeric(NA))
  #OLS
  lm_fit = lm(covariate ~ Tr)
  out[method == 'lm']$p = coeftest(lm_fit, vcov. = cluster.vcov(lm_fit, cluster))[2,4]
  
  #KS
  ks_fit = ks.boot(covariate[t_idx], covariate[c_idx])
  out[method == 'ks']$p = ks_fit$ks.boot.pvalue
  
  #Negative Binomial
  if (do_nb) {
    nb_fit = glm.nb(covariate ~ Tr)
    out[method == 'nb']$p = coeftest(nb_fit, vcov. = cluster.vcov(nb_fit, cluster))[2,4]
  }
  
  #Chi-Square
  if (do_chisq) {
    chisq_fit = chisq.test(x_table, simulate.p.value = T)
    out[method == 'chisq']$p = chisq_fit$p.value
  }
    
  return(out)
}


#####################
#Election Attributes#
#####################

###########
#Figure D1#
###########

#Keep estimation sample
useTable_podes = mergedTable_podes[!is.na(Violence_1_count)]

u_bw = useTable_podes[(NI_close) & (IT_close), bw] %>% unique %>% sort

out = data.table(bw = u_bw, n = as.numeric(NA), 
                 pr_win = as.numeric(NA), p = as.numeric(NA), 
                 upper = as.numeric(NA), lower = as.numeric(NA))

for (b in u_bw) {
  tab = useTable_podes[(NI_close) & (IT_close) & bw <= b, IT_win] %>% table
  
  if (length(tab) == 2) {
    test = binom.test(rev(tab))
    out[bw %in% b, c('n', 'pr_win', 'p', 'lower', 'upper') := 
          list(test$parameter, test$estimate, test$p.value, test$conf.int[1], test$conf.int[2])]
  }
}

out[, out_ci := (lower > 0.5) | (upper < 0.5)]
out[, r := sqrt(n/pi) / 10]

#Plot binomial test result across bandwidths
pdf("./output/figures/figure_d1.pdf")
p = ggplot(out[!is.na(p)], aes(bw, pr_win))+
  geom_point(aes(colour = out_ci)) +
  geom_ribbon(aes(ymin=lower,ymax=upper),alpha=0.3) + 
  geom_vline(xintercept = 0.01) +
  geom_hline(yintercept = 0.5) + theme_bw() +
  xlab("Bandwidth") + ylab("Binomial Test CI") + 
  guides(colour=guide_legend(title="Reject Null"))
print(p)
dev.off()


###########
#Figure D4#
###########

#Create variables#
##################
useTable_podes[, winner_fr_votes := winner_total_votes - winner_remainder]
useTable_podes[, loser_fr_votes := loser_total_votes - loser_remainder]
useTable_podes[, all_NI_fr_vs := (all_NI_vs - all_NI_remainder) / all_total_votes]
useTable_podes[, all_IT_fr_vs := (all_IT_vs - all_IT_remainder) / all_total_votes]
useTable_podes[(sample_ni_it), IT_fr_vs := ((IT_win)*winner_fr_votes + (NI_win)*loser_fr_votes) / all_total_votes]
useTable_podes[(sample_ni_it), NI_fr_vs := ((NI_win)*winner_fr_votes + (IT_win)*loser_fr_votes) / all_total_votes]
useTable_podes[(sample_ni_it), IT_fr_seats := ((IT_win)*winner_first_round_seats + (NI_win)*loser_first_round_seats)]
useTable_podes[(sample_ni_it), NI_fr_seats := ((NI_win)*winner_first_round_seats + (IT_win)*loser_first_round_seats)]

#Select 1% bw
close_IT = useTable_podes[(sample_ni_it)]

#First round performance: 
cov_list = close_IT[, list(`Islamist (all) first round voteshare` = all_IT_fr_vs, 
                           `Islamist (all) first round seats` = all_IT_fr_seats, 
                           `Islamist (close) first round voteshare` = IT_fr_vs, 
                           `Islamist (close) first round seats` = IT_fr_seats, 
                           `Secular (all) first round voteshare` = all_NI_fr_vs, 
                           `Secular (all) first round seats` = all_NI_fr_seats, 
                           `Secular (close) first round voteshare` = NI_fr_vs, 
                           `Secular (close) first round seats` = NI_fr_seats)]

#Descriptives
Map(function(y) summ_loop(y, cov_list),
    names(cov_list)) %>% 
  rbindlist %>%
  fwrite("./output/descriptives/balance_first_round.csv")

#Test
party_first_round_performance = lapply(cov_list, function(x)
  balance_tests(x, close_IT$IT_win*1, close_IT$cluster)) %>%
  rbindlist(idcol = 'covariate')


pdf('./output/figures/figure_d4.pdf')
p = ggplot(party_first_round_performance, aes(p, covariate, group = method)) +
  geom_jitter(aes(shape = method), width = 0, height = 0.1, size = 3) + 
  scale_shape_manual(values = c('C', 'K', 'L', 'N')) + 
  scale_x_continuous(name="p-value", limits=c(0, 1)) +
  theme_bw() + geom_vline(xintercept = 0.05)
print(p)
dev.off()


###########
#Figure D3#
###########

#Election Attributes:
cov_list = close_IT[, list(`Dapil Seat Quota` = quota, 
                           `Total Votes (#)` = all_total_votes, 
                           `Dapil Seats (#)` = dapil_seats, 
                           `Unallocated Seats after First Round` = remaining_seats, 
                           `Effective Number of Parties` =enp, 
                           `Number of Secular Parties Competing` = NI_count, 
                           `Number of Islamist Parties Competing` =IT_count)]

#Descriptives
Map(function(y) summ_loop(y, cov_list),
    names(cov_list)) %>% 
  rbindlist %>%
  fwrite("./output/descriptives/balance_election_att.csv")

#test
election_attributes = lapply(cov_list, function(x)
  balance_tests(x, close_IT$IT_win*1, close_IT$cluster)) %>%
  rbindlist(idcol = 'covariate')

pdf('./output/figures/figure_d3.pdf')
p = ggplot(election_attributes, aes(p, covariate, group = method)) +
  geom_jitter(aes(shape = method), width = 0, height = 0.1, size = 3) + 
  scale_shape_manual(values = c('C', 'K', 'L', 'N')) + 
  scale_x_continuous(name="p-value", limits=c(0, 1)) +
  theme_bw() + geom_vline(xintercept = 0.05)
print(p)
dev.off()

#####################################
#PODES: Dapil demographic attributes#
#####################################

useTable_podes_balance = mergedTable_podes_balance

#Select 1% bw
close_IT = useTable_podes_balance[(sample_ni_it)]


#############################################
#Balance tests on pre-treatment demographics#
#############################################
cov_list = close_IT[, list(`Households in Agriculture (%)` = econ_hh_ag_pct, 
                           `Households in Slums (%)` = econ_hh_in_slums_pct,
                           `Households with Electricity (%)` = econ_hh_elec_pct,
                           `Households with Electricity (%)` = econ_kab_support_per_cap,
                           `Transfers from Kabupaten Govt (Rp)` = demo_hh,
                           `Secondary Schools per 10k` = demo_scndschl_per_10k,
                           `Average Distance to District Capital` = demo_dcapital_dist,
                           `Villages with Bars/Nightclubs (%)` = att_bars_pct,
                           `Village with Muslim Majority (%)` = relg_majority_muslim_pct,
                           `Mosques per 10k` = relg_mosques_per_10k,
                           `Churches per 10k` = relg_churches_per_10k,
                           `Madrasahs per 10k` = relg_madrasah_per_10k
                           )]

#Descriptives
Map(function(y) summ_loop(y, cov_list),
    names(cov_list)) %>% 
  rbindlist %>%
  fwrite("./output/descriptives/balance_dapil_att.csv")


dapil_attributes = lapply(cov_list, function(x)
  balance_tests(x, close_IT$IT_win*1, close_IT$cluster)) %>%
  rbindlist(idcol = 'covariate')

pdf('./output/figures/figure_d2.pdf')
p = ggplot(dapil_attributes, aes(p, covariate, group = method)) +
  geom_jitter(aes(shape = method), width = 0, height = 0.1, size = 3) + 
  scale_shape_manual(values = c('C', 'K', 'L', 'N')) + 
  scale_x_continuous(name="p-value", limits=c(0, 1)) +
  theme_bw() + geom_vline(xintercept = 0.05)
print(p)
dev.off()

#####################################
#2000 Census Fractionalization ######
#####################################


#Select 1% bw
close_IT = mergedTable_frac_balance[(sample_ni_it)]


#############################################
#Balance tests on pre-treatment demographics#
#############################################
cov_list = close_IT[, list(`Mean Village Ethnic Fractionalization` = ethfractvil, 
                           `Mean Village Religious Fractionalization` = relfractvil
)]

#Descriptives
Map(function(y) summ_loop(y, cov_list),
    names(cov_list)) %>% 
  rbindlist %>%
  fwrite("./output/descriptives/balance_dapil_frac.csv")


dapil_frac = lapply(cov_list, function(x)
  balance_tests(x, close_IT$IT_win*1, close_IT$kab_2000)) %>%
  rbindlist(idcol = 'covariate')


#To estimate imbalance on district-level attributes, 
#we aggregate data to the 2000 kabupaten to correctly account for dependence in the errors
agg_close_IT = mergedTable_frac_balance[, list(close_pct = mean(sample_ni_it, na.rm = T), 
                                               close_win_pct = mean(sample_ni_it & IT_win, na.rm = T),
                                               relseg_d = mean(relseg_d, na.rm = T),
                                               ethseg_d = mean(ethseg_d, na.rm = T)) , by = kab_2000]

frac_district_balance = list(lm(relseg_d ~ close_win_pct + close_pct, agg_close_IT[close_pct > 0]),
     lm(ethseg_d ~ close_win_pct + close_pct, agg_close_IT[close_pct > 0]))

add = data.table(covariate = c("District Religious Segregation", "District Ethnic Segregation"),
                 method = "lm",
                 p = frac_district_balance %>% lapply(function(x) summary(x)$coefficients[2,4]) %>% unlist)


#Combine fractionalization balance tests
dapil_frac = rbind(dapil_frac, add)

pdf('./output/figures/figure_d5.pdf')
p = ggplot(dapil_frac, aes(p, covariate, group = method)) +
  geom_jitter(aes(shape = method), width = 0, height = 0.1, size = 3) + 
  scale_shape_manual(values = c('C', 'K', 'L', 'N')) + 
  scale_x_continuous(name="p-value", limits=c(0, 1)) +
  theme_bw() + geom_vline(xintercept = 0.05)
print(p)
dev.off()


#Clean up
drop = setdiff(ls(), c(old_ws)) 
rm(list = drop)